import scrapy


class DoubanScrawlSpider(scrapy.Spider):
    name = "douban_scrawl"
    # allowed_domains = ["www.baidu.com"]
    start_urls = ["https://movie.douban.com/"]

    def parse(self, response):
        print("******开始爬取豆瓣网********")
        li_lst = response.xpath('//*[@id="screening"]/div[2]/ul/li')
        for li in li_lst:
            urls = li.xpath('./ul/li[1]/a/@href').extract()
            titles = li.xpath('./ul/li[2]/a/text()').extract()
            if len(urls) > 0:
                url = urls[0]
                title = titles[0]
            else:
                continue

            # 手动请求发送: callback回调函数专门用于数据解析
            yield scrapy.Request(url=url,callback=self.parse)
